• Each cell can add a column to our state data, you can only run it once, or you will get another the same columns

In [1]:
import xlrd
import pandas as pd
import numpy as np
statelist=["AK","AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","HI","IA","ID","IL","IN","KS","KY","LA","MA","MD","ME","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY","OH","OK","OR","PA","RI","SC","SD","TN","TX","US","UT","VA","VT","WA","WI","WV","WY"]
len(statelist)
statelist[44]


Out[1]:
'US'

In [7]:
#add CLPRB columns
CLPRB=pd.read_excel("Data/Original Data/more MSN/CLPRB.xlsx")
for j in range(52):
    dftemp = pd.read_csv('Data/Data_States/%s.csv' % (statelist[j]))
    dftemp.rename(columns={'Unnamed: 0':'Year','Unnamed: 5':'GDP'}, inplace = True)
    dftemp['CLPRB']=0 
    if j==44:  #for state US, missing data of GDP
        continue
    #else: dftemp.drop(dftemp.index[55],inplace=True) #delete last line of GDP
    data = xlrd.open_workbook('Data/Original Data/more MSN/CLPRB.xlsx') # open xlsx file 
    table = data.sheets()[j] # open sheet j
    nrows = table.nrows # get how many lines
    for i in range(1,nrows): #cycle in the table
        dftemp['CLPRB'][i-1] = table.row_values(i)[3] # columns 4 
    dftemp.to_csv('Data/Data_States/%s.csv'% (statelist[j]), encoding='utf-8', index=False)


C:\Users\rahul\Miniconda3\lib\site-packages\ipykernel\__main__.py:14: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [13]:
#add CLPRB columns to US state
j=44
CLPRB=pd.read_excel("..\CLPRB.xlsx")
dftemp = pd.read_csv('%s.csv' % (statelist[j]))
dftemp.rename(columns={'Unnamed: 0':'Year','Unnamed: 5':'GDP'}, inplace = True)
dftemp['CLPRB']=0 
data = xlrd.open_workbook('..\CLPRB.xlsx') # open xlsx file 
table = data.sheets()[j] # open sheet j
nrows = table.nrows # get how many lines
for i in range(1,nrows): #cycle in the table
    dftemp['CLPRB'][i-1] = table.row_values(i)[3] # columns 4 
dftemp.to_csv('%s.csv'% (statelist[j]), encoding='utf-8', index=False)


C:\Users\RicardoZhang\Miniconda3\lib\site-packages\ipykernel\__main__.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [6]:
#add EMFDB columns
EMFDB=pd.read_excel("Data/Original Data/more MSN/EMFDB.xlsx")
for i in range(52):
    tempdf=EMFDB[EMFDB.StateCode=='%s'% (statelist[i])]
    del tempdf['MSN']
    del tempdf['StateCode']
    df = pd.read_csv('Data/Data_States/%s.csv' % (statelist[i]))
    df_r=pd.merge(df, tempdf,on='Year',how='outer')
    df_r.rename(columns={'Data':'EMFDB'}, inplace = True)
    df_r.to_csv('Data/Data_States/%s.csv'% (statelist[i]), encoding='utf-8', index=False)

In [16]:
#add ENPRP columns
ENPRP=pd.read_excel("..\ENPRP.xlsx")
for i in range(52):
    tempdf=ENPRP[ENPRP.StateCode=='%s'% (statelist[i])]
    del tempdf['MSN']
    del tempdf['StateCode']
    df = pd.read_csv('%s.csv' % (statelist[i]))
    df_r=pd.merge(df, tempdf,on='Year',how='outer')
    df_r.rename(columns={'Data':'ENPRP'}, inplace = True)
    df_r.to_csv('%s.csv'% (statelist[i]), encoding='utf-8', index=False)

In [17]:
#add NGMPB columns
NGMPB=pd.read_excel(open("..\\NGMPB.xlsx",'rb'))
for i in range(52):
    tempdf=NGMPB[NGMPB.StateCode=='%s'% (statelist[i])]
    del tempdf['MSN']
    del tempdf['StateCode']
    df = pd.read_csv('%s.csv' % (statelist[i]))
    df_r=pd.merge(df, tempdf,on='Year',how='outer')
    df_r.rename(columns={'Data':'NGMPB'}, inplace = True)
    df_r.to_csv('%s.csv'% (statelist[i]), encoding='utf-8', index=False)

In [18]:
#add PAPRB columns
PAPRB=pd.read_excel("..\PAPRB.xlsx")
for i in range(52):
    tempdf=PAPRB[PAPRB.StateCode=='%s'% (statelist[i])]
    del tempdf['MSN']
    del tempdf['StateCode']
    df = pd.read_csv('%s.csv' % (statelist[i]))
    df_r=pd.merge(df, tempdf,on='Year',how='outer')
    df_r.rename(columns={'Data':'PAPRB'}, inplace = True)
    df_r.to_csv('%s.csv'% (statelist[i]), encoding='utf-8', index=False)

In [ ]: